Figures

Showcase

library(here)
source(here("setup.R"))

Parameters

theta_showcase <- read_csv(here("data/results/4_showcase_model/thetas.csv")) %>%
    rename("\\rho" = 2)

standard_errors <- read_csv(here("data/results/4_showcase_model/standard_errors.csv"))
standard_errors

variances <- theta_showcase %>%
    mutate(across(where(is.numeric), ~ exp(.x)))

sds <- variances %>%
    mutate(across(where(is.numeric), ~ sqrt(.x)))
sds

var_log_M <- variances %>%
    filter(method == "MLE") %>%
    pull(M)

var_M <- exp(var_log_M) - 1
sd_M <- sqrt(var_M)

c("var_M" = var_M, "sd_M" = sd_M)
A spec_tbl_df: 5 × 2
parameter standard_error
<chr> <dbl>
log_rho 0.0003029599
W 0.0004857571
q 0.0232959030
M 0.0028312903
W_q 0.0399133839
A tibble: 3 × 6
method \rho W q M W_q
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
manual 0.00100000 0.10000000 0.5000000 0.0100000 0.1000000
initial 0.01482601 0.02379713 0.1203075 0.1360824 0.8124226
MLE 0.01482571 0.02379714 0.1208721 0.1360977 0.8133194
var_M
0.0186951937455615
sd_M
0.136730368775783

tables/showcase-parameters.tex

# use scientific notation
sds_renamed <- sds %>%
    rename_with(function(col) str_glue("$\\hat\\sigma_{{ {col} }}$"), where(is.numeric))
    
sds_renamed %>%
    kable(format = "latex", format.args = list(scipen = 2, digits = 2), booktabs = T, escape = F) %>%
    cat(., file = here("tables/showcase-parameters.tex"))

Raw data

rep_tri <- read_csv(here("data/processed/RKI_4day_rt.csv"))

Predictions

df_showcase <- read_predictions(
    here("data/results/4_showcase_model/predictions.npy"),
    seq(ymd("2020-04-05"), ymd("2020-09-01"), by = "1 day"),
    c("I", "I_corrected", "$\\rho$", "M", "W", "running_W", "$p^s_1$", "$p^s_2$", "$p^s_3$", "$p^s_4$", "$p_1$", "$p_2$", "$p_3$", "$p_4$", "$W_{q_1}", "W_{q_2}", "W_{q_3}")
)

figures/showcase_prediction_intervals_I_rho.tex

df_showcase %>%
    filter(variable %in% c("$W_{q_1}","W_{q_2}", "W_{q_3}")) %>%
    filter(date >= ymd('2020-07-01'), date <= ymd("2020-08-01")) %>%
    ggplot(aes(date, mean, color=variable)) +
    geom_line() +
    #scale_y_log10() +
    facet_wrap(~variable, ncol=1, scales="free_y")
    
rep_tri %>%
    filter(county_date >= ymd('2020-07-01'), county_date <= ymd("2020-08-01"))
A spec_tbl_df: 32 × 5
county_date 1 2 3 4
<date> <dbl> <dbl> <dbl> <dbl>
2020-07-01 300 138 25 0
2020-07-02 298 143 7 7
2020-07-03 256 87 28 31
2020-07-04 141 88 51 14
2020-07-05 75 63 1 1
2020-07-06 170 125 11 0
2020-07-07 236 134 18 1
2020-07-08 299 131 12 2
2020-07-09 263 140 6 1
2020-07-10 221 112 21 0
2020-07-11 129 81 0 64
2020-07-12 55 0 75 1
2020-07-13 10 303 7 1
2020-07-14 224 199 28 3
2020-07-15 303 208 14 1
2020-07-16 351 171 3 3
2020-07-17 345 50 32 68
2020-07-18 139 159 80 17
2020-07-19 55 98 19 2
2020-07-20 257 124 10 24
2020-07-21 276 177 41 13
2020-07-22 381 259 38 1
2020-07-23 445 209 4 0
2020-07-24 488 138 19 92
2020-07-25 162 232 110 26
2020-07-26 106 81 19 1
2020-07-27 309 168 26 13
2020-07-28 400 279 50 9
2020-07-29 561 303 33 1
2020-07-30 488 318 19 7
2020-07-31 523 181 39 147
2020-08-01 233 197 90 26

fmt_numbers <- function(x) {
  s <- formatC(x, format = "f", big.mark = ",", digits = 0)
  gsub(",", "\\\\,", s)  # escape backslash for LaTeX
}

delta_t <- 4
df_corrected <- df_showcase %>%
    filter(date >= min(date) + delta_t, date <= max(date) - delta_t) %>%
    filter(variable %in% c("I_corrected", "I")) %>%
    group_by(variable) %>%
    summarize(total = sum(mean)) %>%
    rbind(
    rep_tri %>%
        filter(county_date <= max(df_showcase$date) - delta_t, county_date >= min(df_showcase$date) + delta_t) %>%
        summarize(total = sum(`1` + `2` + `3` + `4`)) %>%
        mutate(variable="truth")
    ) %>%
    mutate(
        header = case_when(
            variable == 'I' ~ "$\\sum_{t = 4}^{T-4} I_t$",
            variable == 'I_corrected' ~ "$\\sum_{t = 4}^{T-4} \\bar W_t I_t$",
            variable == 'truth' ~ "$\\sum_{t = 4}^{T-4} Y_t$"
        )
    ) %>%
    select(" " = header, total)

df_corrected %>%
    mutate(across(where(is.numeric), function(x) round(x, -3))) %>%
    mutate(across(where(is.numeric), fmt_numbers)) %>%
    kable('latex', escape = F, booktabs=T) %>%
    cat(., file = here("tables/total_calculation.tex"))
total_df <- rep_tri %>%
    mutate(total = `1` + `2` + `3` + `4`) %>%
    select(date = county_date, total)

plt_I <- df_showcase %>%
    filter(variable == "I") %>%
    ggplot(aes(date, mean)) +
    geom_ribbon(aes(date, ymin = `0.025`, ymax = `0.975`), fill = "darkgreen", alpha = .3) +
    geom_line(aes(linetype = "mean prediction")) +
    geom_line(aes(y = total, linetype = "total reported cases"), data = filter(total_df, date <= max(df_showcase$date))) +
    geom_line(aes(y = mean, linetype = "corrected"), data = filter(df_showcase, variable == "I_corrected")) +
    labs(x = "", y = "$Y_{t,\\cdot}$ / $I_{t}$", linetype = "") +
    scale_x_four_weekly() +
    scale_linetype_manual(values=c("mean prediction" = 1, "total reported cases" = 2, "corrected" = 3))

plt_rho <- df_showcase %>%
    filter(variable == "$\\rho$") %>%
    ggplot(aes(date, mean)) +
    geom_ribbon(aes(date, ymin = `0.025`, ymax = `0.975`), fill = "darkgreen", alpha = .3) +
    geom_line() +
    geom_hline(yintercept = 1, linetype = "dashed", color = "grey80", size = 2) +
    # geom_text(x = min(df_showcase$date), y = 1.00, label = "threshold for \n exponential growth", color = "grey80", vjust = -1, hjust = .15, size = 2) +
    labs(x = "", y = "$\\rho$") +
    scale_y_continuous(
        sec.axis = sec_axis(~ .^7, name = "$\\rho^7$", breaks = round(c(.9, .95, 1., 1.05, 1.1, 1.15)^7, 2))
    ) +
    scale_x_four_weekly()

plt_I / plt_rho + plot_layout(guides = "collect") & theme(legend.position = "top")

ggsave_tikz(here("tikz/showcase_prediction_intervals_I_rho.tex"), height = default_height)
agg_record_706826532: 2

figures/showcase_prediction_intervals.tex

plt_MW <- df_showcase %>%
    filter((variable %in% c("M", "W"))) %>%
    ggplot(aes(date, mean)) +
    geom_ribbon(aes(date, ymin = `0.025`, ymax = `0.975`), fill = "darkgreen", alpha = .3) +
    geom_line() +
    facet_wrap(~variable, scales = "free_y", ncol = 1) +
    labs(x = "", y = "")

plt_p <- df_showcase %>%
    filter(str_starts(variable, "\\$p")) %>%
    mutate(delay = str_extract(variable, "\\d+")) %>%
    select(date, mean, variable, delay) %>%
    mutate(variable = ifelse(str_detect(variable, "s"), "smoothed", "signal")) %>%
    ggplot(aes(date, 100 * mean, color = delay, linetype = variable)) +
    geom_line() +
    scale_y_continuous(breaks = 20 * 0:5, limits = c(0, 1) * 100) +
    scale_linetype_manual(values = c("signal" = "dotted", "smoothed" = "solid")) +
    labs(color = "$\\tau$", x = "", y = "probability of delay [\\%]", linetype = "")

plt_MW / plt_p + plot_layout(guides = "collect") & theme(legend.position = "bottom") & scale_x_four_weekly()
# theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave_tikz(here("tikz/showcase_prediction_intervals.tex"), height = 1.5 * default_height)
agg_record_1559631979: 2

Weekday effect changes slightly over time

df_showcase %>%
    filter(variable == "W") %>%
    pull(mean) %>%
    head(-1) %>%
    matrix(nrow = 7) %>%
    t() %>%
    matplot(type = "l")
legend("bottomright", lty = 1:7, legend = c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"), col = 1:7, cex = 0.8, bty = "n")
Warning message in matrix(., nrow = 7):
“data length [149] is not a sub-multiple or multiple of the number of rows [7]”

Weekly average of weekday effects close to 1

df_showcase %>%
    filter(variable == "running_W") %>%
    select(mean, sd) %>%
    head(-3) %>%
    tail(-3) %>%
    summarize(mean(.$mean), mean(.$sd))
A tibble: 1 × 2
mean(.$mean) mean(.$sd)
<dbl> <dbl>
1.057842 0.00642704
df_showcase %>%
    filter(variable == "running_W") %>%
    ggplot(aes(date, mean)) +
    geom_line() +
    geom_ribbon(aes(ymin = `0.025`, ymax = `0.975`), fill = "darkgreen", alpha = .3)

Heavier tails than normal tails in mean M.

mean_Ms <- df_showcase %>%
    filter(variable == "M") %>%
    pull(mean)

mean(mean_Ms)

qqnorm(log(mean_Ms))
qqline(log(mean_Ms))
0.999627559009596

Christmas

parameters

theta_christmas <- read_csv(here("data/results/4_christmas_model/thetas.csv")) %>%
    rename("\\rho" = 2)
theta_christmas_miss <- read_csv(here("data/results/4_christmas_model/thetas_miss.csv")) %>%
    rename("\\rho" = 2)

sds_christmas <- theta_christmas %>%
    mutate(across(where(is.numeric), ~ exp(.x / 2)))
sds_christmas_miss <- theta_christmas_miss %>%
    mutate(across(where(is.numeric), ~ exp(.x / 2)))
se <- read_csv(here("data/results/4_christmas_model/standard_errors_non_miss.csv")) %>%
    pivot_wider(names_from=1, values_from=2) %>%
    rename("\\rho" = 1) %>%
    mutate(method = "standard_error") %>%
    select(method, everything())
se_miss <- read_csv(here("data/results/4_christmas_model/standard_errors_miss.csv")) %>%
    pivot_wider(names_from=1, values_from=2) %>%
    rename("\\rho" = 1) %>%
    mutate(method = "standard_error") %>%
    select(method, everything())

tables/christmas-parameters.tex

# use scientific notation
sds_concat <- sds_christmas %>%
    bind_rows(
        se,
        sds_christmas_miss,
        se_miss
    ) %>%
    rename_with(function(col) str_glue("$\\hat\\sigma_{{ {col} }}$"), where(is.numeric))
sds_concat %>%
    filter(method %in% c("MLE", "standard_error")) %>%
    mutate(scenario = c("all", "all", "C. rem.", "C. rem.")) %>%
    pivot_longer(-c(method, scenario), names_to = "parameter", values_to = "value") %>%
    pivot_wider(names_from = method, values_from = value)  %>%
    mutate(
         lower = MLE - 2 * standard_error,
        upper = MLE + 2 * standard_error
        )
A tibble: 10 × 6
scenario parameter MLE standard_error lower upper
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
all $\hat\sigma_{ \rho }$ 0.012597132 0.0003185590 0.011960014 0.01323425
all $\hat\sigma_{ W }$ 0.031500111 0.0006740364 0.030152038 0.03284818
all $\hat\sigma_{ q }$ 0.375075886 0.0505961058 0.273883675 0.47626810
all $\hat\sigma_{ M }$ 0.110339036 0.0025024413 0.105334153 0.11534392
all $\hat\sigma_{ W_q }$ 0.905485643 0.0470995462 0.811286550 0.99968473
C. rem. $\hat\sigma_{ \rho }$ 0.008689361 0.0015244479 0.005640465 0.01173826
C. rem. $\hat\sigma_{ W }$ 0.027744777 0.0015711518 0.024602473 0.03088708
C. rem. $\hat\sigma_{ q }$ 0.160957656 0.0039757282 0.153006200 0.16890911
C. rem. $\hat\sigma_{ M }$ 0.048454349 0.0044374841 0.039579381 0.05732932
C. rem. $\hat\sigma_{ W_q }$ 0.376345876 0.0212759141 0.333794048 0.41889770
sds_concat %>%
    filter(method %in% c("MLE", "standard_error")) %>%
    mutate(scenario = c("all", "all", "C. rem.", "C. rem.")) %>%
    pivot_longer(-c(method, scenario), names_to = "parameter", values_to = "value") %>%
    pivot_wider(names_from = method, values_from = value)  %>%
    ggplot(aes(parameter, color=scenario)) +
    geom_point(aes(y=MLE), position = position_dodge2(width=.2)) +
    geom_errorbar(aes(ymin=MLE - 2* standard_error, ymax = MLE+ 2 * standard_error), position = position_dodge2(width=.1), width=.2) +
    scale_y_log10() +
    scale_x_discrete(limits = rev) +
    coord_flip() +
    labs(x="", y="", color="")

ggsave_tikz(here("tikz/christmas_parameters_CI.tex"), height=3, width=7.5)
agg_record_127092350: 2

fmt_round <- function(x, k=3) format(round(x, k), nsmall=k)

sds_concat %>%
    mutate(method = paste0(method, ifelse(1:8 < 5, "_all", "_removed"))) %>%
    pivot_longer(-method, names_to = "parameter", values_to = "value") %>%
    pivot_wider(names_from = method, values_from = value) %>%
    select(-manual_removed) %>%
    mutate(
        MLE_all_display = str_glue("{fmt_round(MLE_all)} ({fmt_round(standard_error_all)})"),
        MLE_removed_display = str_glue("{fmt_round(MLE_removed)} ({fmt_round(standard_error_removed)})")
    )  %>%
    mutate(across(where(is.numeric), fmt_round)) %>%
    select(parameter, manual = manual_all, initial_all, initial_removed, MLE_all_display, MLE_removed_display) %>%
    kable(format = "latex", booktabs = T, escape = F, col.names=c(" ", "\\textbf{manual}", "\\textbf{all}", "\\textbf{C. rem.}", "\\textbf{all}", "\\textbf{C. rem.}"), align = c("l", "c", "c", "c", "c", "c")) %>% add_header_above(
        c(" " = 2, "$\\\\theta^0$" = 2, "$\\\\hat\\\\theta$" = 2),
        escape = F, line = TRUE
    ) %>%
    cat(., file = here("tables/christmas-parameters.tex"))

Predictions

df_christmas <- read_predictions(
    here("data/results/4_christmas_model/predictions.npy"),
    seq(ymd("2020-10-01"), ymd("2021-02-28"), by = "1 day"),
    c("I", "$\\rho$", "M", "W", "running_W", "$p^s_1$", "$p^s_2$", "$p^s_3$", "$p^s_4$", "$p_1$", "$p_2$", "$p_3$", "$p_4$", "$W_{q_1}", "W_{q_2}", "W_{q_3}", "y_christmas")
)

df_christmas_missing <- read_predictions(
    here("data/results/4_christmas_model/predictions_miss.npy"),
    seq(ymd("2020-10-01"), ymd("2021-02-28"), by = "1 day"),
    c("I", "$\\rho$", "M", "W", "running_W", "$p^s_1$", "$p^s_2$", "$p^s_3$", "$p^s_4$", "$p_1$", "$p_2$", "$p_3$", "$p_4$", "$W_{q_1}", "W_{q_2}", "W_{q_3}", "y_christmas")
)
missing_indices <- 80:109
dates_missing <- sort(unique(df_christmas$date))[missing_indices]
range(dates_missing)

predictive distribution of total number of cases

imputed <- df_christmas_missing %>%
    filter(variable == "y_christmas") %>%
    head(1) %>%
    select(-date, -variable) %>%
    pivot_longer(everything()) %>%
    deframe()

removed <- total_df %>%
    filter(date %in% dates_missing) %>%
    summarize(sum(total)) %>%
    pull()

f_digit <- function(x) format(round(x, -3), big.mark = ",")
str_glue("Removed {format(removed, big.mark=',')} cases")
str_glue("Mean imputed cases: {f_digit(imputed['mean'])}+- {f_digit(imputed['sd'])}, 95% PI {f_digit(imputed['0.025'])} - {f_digit(imputed['0.975'])} ")
str_glue("Median imputed cases: {f_digit(imputed['0.5'])}")
'Removed 551,031 cases'
'Mean imputed cases: 618,000+- 59,000, 95% PI 512,000 - 743,000 '
'Median imputed cases: 615,000'
df_plot <- rbind(
    mutate(df_christmas, model = "all observations"),
    mutate(df_christmas_missing, model = "Christmas removed")
) %>%
    select(model, date, variable, mean, `0.025`, `0.975`) %>%
    filter(variable %in% c("I", "$\\rho$"))

total_df_smoothed <- total_df %>%
    mutate(total = rollmean(total, 7, 0, True, align = "center")) %>%
    filter(date %in% unique(df_plot$date)) %>%
    mutate(model = "weekly average cases")

plt_I <- df_plot %>%
    filter(variable == "I") %>%
    ggplot(aes(date, mean, color = model)) +
    geom_rect(xmin = min(dates_missing), xmax = max(dates_missing), ymin = -Inf, ymax = Inf, fill = "gray", alpha = .01, inherit.aes = F) +
    geom_line(aes(date, total), data = total_df_smoothed) +
    # geom_ribbon(aes(ymin = `0.025`, ymax = `0.975`, fill = model), alpha = .3) +
    geom_line(aes(y = `0.025`, color = model), linetype = "dotted") +
    geom_line(aes(y = `0.975`, color = model), linetype = "dotted") +
    geom_line() +
    labs(x = "", y = "$I$", fill = "", color = "") +
    scale_color_manual(values = c("all observations" = pal_npg()(3)[1], "Christmas removed" = pal_npg()(3)[2], "weekly average cases" = "black"))

plt_rho <- df_plot %>%
    filter(variable == "$\\rho$") %>%
    ggplot(aes(date, mean, color = model)) +
    # geom_rect(xmin = min(dates_missing), xmax = max(dates_missing), ymin = -Inf, ymax = Inf, fill = "gray80", alpha = .01, inherit.aes = F) +
    # geom_ribbon(aes(ymin = `0.025`, ymax = `0.975`, fill = model), alpha = .3) +
    geom_line(aes(y = `0.025`, color = model), linetype = "dotted", show.legend = F) +
    geom_line(aes(y = `0.975`, color = model), linetype = "dotted", show.legend = F) +
    geom_line(show.legend = F) +
    labs(x = "", y = "$\\rho$", fill = "", color = "")


plt_I / plt_rho + plot_layout(guides = "collect") & theme(legend.position = "bottom") & scale_x_four_weekly()

ggsave_tikz(here("tikz/christmas_prediction_intervals_I_rho.tex"))
pdf: 2

median in christmas period smaller than mean for missing model

df_christmas_missing %>%
    filter(variable == "I") %>%
    select(date, mean, `0.5`) %>%
    pivot_longer(-date) %>%
    ggplot(aes(date, value, color = name)) +
    geom_line()

tikz/christmas_delay_probs.tex

df_plot <- rbind(
    mutate(df_christmas, model = "all observations"),
    mutate(df_christmas_missing, model = "Christmas removed")
) %>%
    select(model, date, variable, mean, `0.025`, `0.975`) %>%
    filter(str_starts(variable, "\\$p\\^s")) %>%
    mutate(delay = str_extract(variable, "\\d+")) %>%
    select(-variable)


df_plot %>%
    ggplot(aes(date, 100 * mean, color = delay, linetype = model)) +
    geom_line() +
    ylim(0, 100) +
    scale_x_four_weekly() +
    labs(x = "", y = "probability of delay [\\%]", color = "$\\tau$", linetype = "") +
    theme(legend.position = "bottom")

ggsave_tikz(here("tikz/christmas_delay_probs.tex"), height = 1 / 2 * default_height)
pdf: 2